# ------------------------------------------------------
# process data to produce cleaned regression tables 
# here we measure the distribution of peer depression across multilayered peer groups
# depending on different conditions 
# conditions for data pull : 0. baseline setting across all different peer types
# conditions for data pull : 1. different reference groups for grade peer measures 
# conditions for data pull : 2. exclude self for grade peer measures 
# by BK Lee (bl11@indiana.edu)
# ------------------------------------------------------

load_packages = c('rio','here','dplyr','data.table','fst',
	'drake','moments','igraph','Matrix','car','lfe','expss','survey',
	'RStata','stargazer','lfe')

invisible(lapply(load_packages, library, character.only = TRUE))

# ------ set up the default directory
data_path = ""
project_path = ""

setwd(data_path)

# ---- load various custom functions for data processing
source(file.path(project_path,'rdata_util.R'))

# ---- depression indicators
all_dep_indicators = c("s60i","s60j","s60k","s60l","s60m","s60n")
    #s60i : did you have trouble eating, or a poor appetite? 
    #s60j : did you have trouble falling asleep or staying asleep?  
    #s60k : did you feel depressed or blue?  
    #s60l : did you have trouble relaxing? 
    #s60m : were you moody? 
    #s60n : did you cry a lot?  
    # [[ 0 : never, 1:rarely, 2:occasionally, 3:often, 4:everyday, 9:multiple response ]]

control.varlist = c('female','white','black','hispanic','other',
	'immig_1st','immig_2nd','immig_3rd','family_two','family_one','family_other','pa_educ')
    ind_controls = c('female','black','hispanic','other','immig_1st','immig_2nd',
      'family_step','family_one','family_other','pvt','pa_educ','assistance','sibsize')
    peer_controls = c(paste0(c('female','black','hispanic','other','immig_1st','immig_2nd',
      'family_one','family_other','pa_educ'),'.mean'))

# ---- run DEP_clean_inhome_inschool.do file to produce the following three processed files
processed_data_wave1 = data.table(import(file.path(data_path,'processed','v_wave1.dta')))
processed_data_wave2 = data.table(import(file.path(data_path,'processed','v_wave2.dta')))
processed_data_inschool = data.table(import(file.path(data_path,'processed','v_inschool.dta')))

raw_data_inschool = impute_aid_inschool(data.table(import(file.path(data_path,'rawdata','inschool.dta'))))
weight_wave1 = data.table(import(file.path(data_path, 'rawdata','weight_wave1.dta')))
weight_wave2 = data.table(import(file.path(data_path, 'rawdata','weight_wave2.dta')))

raw_data_coursemate_network = data.table(import(file.path(data_path,'rawdata','edunet.xpt')))
raw_data_inschool_friendship = data.table(import(file.path(data_path,'rawdata','sfriend.dta')))

# additional cleaning 
cleaned_data_inschool = process_inschool(processed_data_inschool)

# process data for edge creation
processed_data_depress = process_depress(raw_data_inschool, dep_indicators=all_dep_indicators)
processed_peer_grade = process_grade(raw_data_inschool)
processed_peer_club = process_club(raw_data_inschool)

# create peer-level 'edges'
edgelist_grade = edge_peer_grade(data = processed_peer_grade)
edgelist_club = edge_peer_club(club_data = processed_peer_club)
edgelist_local = edge_peer_course_network(data = raw_data_coursemate_network, filter='all', type='U|re')

edgelist_friendship_all = edge_peer_friendship(friendship_data=raw_data_inschool_friendship, inschool_data=raw_data_inschool,exclude_self=TRUE, same_sex = 'all', direction = 'all')
edgelist_friendship_in = edge_peer_friendship(friendship_data=raw_data_inschool_friendship, inschool_data=raw_data_inschool,exclude_self=TRUE, same_sex = 'all', direction = 'in')
edgelist_friendship_out = edge_peer_friendship(friendship_data=raw_data_inschool_friendship, inschool_data=raw_data_inschool,exclude_self=TRUE, same_sex = 'all', direction = 'out')
edgelist_friendship_both = edge_peer_friendship(friendship_data=raw_data_inschool_friendship, inschool_data=raw_data_inschool,exclude_self=TRUE, same_sex = 'all', direction = 'both')

# --- peer type list
list_peer_type = list()
list_peer_type[[1]] = list(edgelist=edgelist_grade, peer_name = 'grade')
list_peer_type[[2]] = list(edgelist=edgelist_club, peer_name = 'club')
list_peer_type[[3]] = list(edgelist=edgelist_friendship_in, peer_name = 'friend_in')
list_peer_type[[4]] = list(edgelist=edgelist_friendship_out, peer_name = 'friend_out')
list_peer_type[[5]] = list(edgelist=edgelist_friendship_all, peer_name = 'friend_all')
list_peer_type[[6]] = list(edgelist=edgelist_friendship_both, peer_name = 'friend_both')
list_peer_type[[7]] = list(edgelist=edgelist_local, peer_name = 'local_position')

# ------------------------------------------------------
# conditions for data pull : 0. baseline setting across all different peer types
m_filter = 'nofilter' 
m_reference = 'all' 
m_selfinclude = 'include_self' 
m_size = 'drop_none' 

list_peer_hetero = c('peer_depress_mean','peer_depress_median','peer_depress_q95','peer_depress_q05')

for (i in 1:length(list_peer_type)){
	message("now process : ",paste0('reg_main_','multiple','_',m_reference,'_',m_selfinclude,' for ',i))
	edgelist_in = list_peer_type[[i]]$edgelist  
	peer_name = list_peer_type[[i]]$peer_name 
	
	# create peer-level controls 
	controls_peer = measure_peer_control(edgelist = edgelist_in, cleaned_data_inschool, control.varlist)

	# create peer-level 'heterogeneity' + mean measures 
	peer_depress = measure_peer_depress_bottom(edgelist = edgelist_in, processed_data_depress, include_self=m_selfinclude, filter=m_filter, reference=m_reference)
	
	# create self-lagged controls 
	controls_rank = measure_self_depress_rank(edgelist=edgelist_in, processed_data_depress)

	reg_data = combine_data(key='aid',list_data_tables = list(
		processed_data_wave1, processed_data_wave2, weight_wave1, weight_wave2, 
		cleaned_data_inschool, processed_data_depress,
		peer_depress, controls_peer, controls_rank))
	names(reg_data) = gsub(' ','_',names(reg_data))
	names(reg_data) = gsub('\\.','_',names(reg_data))

	export(reg_data, file.path(data_path,'processed',
		paste0('reg_main_',peer_name,'_',m_reference,'_',m_selfinclude,'.dta')))
}

# ------------------------------------------------------
# conditions for data pull : 1. different reference groups for grade peer measures 
m_filter = 'nofilter' 
m_selfinclude = 'include_self' 
m_size = 'drop_none' 
i = 1 # grade peer

for (m_reference in c('grade','all','school')){	
	message("now run : ",paste0('reg_main_','multiple','_',m_reference,'_',m_selfinclude,' for ',i))
	edgelist_in = list_peer_type[[i]]$edgelist  
	peer_name = list_peer_type[[i]]$peer_name 
	
	# create peer-level controls 
	controls_peer = measure_peer_control(edgelist = edgelist_in, cleaned_data_inschool, control.varlist)

	# create peer-level 'heterogeneity' + mean measures 
	peer_depress = measure_peer_depress_bottom(edgelist = edgelist_in, processed_data_depress, include_self=m_selfinclude, filter=m_filter, reference=m_reference)
	
	# create self-lagged controls 
	controls_rank = measure_self_depress_rank(edgelist=edgelist_in, processed_data_depress)

	reg_data = combine_data(key='aid',list_data_tables = list(
		processed_data_wave1, processed_data_wave2, weight_wave1, weight_wave2, 
		cleaned_data_inschool, processed_data_depress,
		peer_depress, controls_peer, controls_rank))
	names(reg_data) = gsub(' ','_',names(reg_data))
	names(reg_data) = gsub('\\.','_',names(reg_data))

	export(reg_data, file.path(data_path,'processed',
		paste0('reg_main_',peer_name,'_',m_reference,'_',m_selfinclude,'.dta')))
}

# ------------------------------------------------------
# conditions for data pull : 2. exclude self for grade peer measures 

m_filter = 'nofilter' 
m_reference = 'all' 
m_selfinclude = 'exclude_self' 
m_size = 'drop_none' 
i = 1 # grade peer

message("now run : ",paste0('reg_main_','multiple','_',m_reference,'_',m_selfinclude,' for ',i))
edgelist_in = list_peer_type[[i]]$edgelist  
peer_name = list_peer_type[[i]]$peer_name 

# create peer-level controls 
controls_peer = measure_peer_control(edgelist = edgelist_in, cleaned_data_inschool, control.varlist)

# create peer-level 'heterogeneity' + mean measures 
peer_depress = measure_peer_depress_bottom(edgelist = edgelist_in, processed_data_depress, include_self=m_selfinclude, filter=m_filter, reference=m_reference)

# create self-lagged controls 
controls_rank = measure_self_depress_rank(edgelist=edgelist_in, processed_data_depress)

reg_data = combine_data(key='aid',list_data_tables = list(
	processed_data_wave1, processed_data_wave2, weight_wave1, weight_wave2, 
	cleaned_data_inschool, processed_data_depress,
	peer_depress, controls_peer, controls_rank))
names(reg_data) = gsub(' ','_',names(reg_data))
names(reg_data) = gsub('\\.','_',names(reg_data))

export(reg_data, file.path(data_path,'processed',
	paste0('reg_main_',peer_name,'_',m_reference,'_',m_selfinclude,'.dta')))




